********************************************************************************
*   Title: make_kinky.do
* Purpose: Produces the many datasets for the final published version.
*          See main() program at the bottom for organization.
*   Owner: Eric Zwick
* Authors: Qiping Xu, Eric Zwick
*     RAs: Tom (Tianfang) Cui, Laurence O'Brien, Harleen Kaur,
* 		   Francesco Ruggieri, Caleb Wroblewski, Ramiro Rossi
*    Date: 2024-05-02
********************************************************************************

/* Input Datasets 
   Compustat:    
              anncomp                   /wrds/comp/sasdata/execcomp/anncomp 
              compustat_sp_rating       /wrds/comp/sasdata/nam/rating/adsprate 
              compustat_company         /wrds/comp/sasdata/nam/company/company 
			  compustat_segmerged       /wrds/comp/sasdata/seghist/wrds_segmerged
			  compustat_seg_customer    /wrds/comp/sasdata/seghist/wrds_seg_customer 
              funda                     /wrds/comp/sasdata/nam/funda + 
				                        /wrds/comp/sasdata/nam/company/company 
			  fundq                     /wrds/comp/sasdata/nam/fundq 
			  internationala            /wrds/comp/sasdata/global/gfunda 				 
			  internationalq            /wrds/comp/sasdata/global/gfundq

	IBES      statsum_epsus             /wrds/ibes/sasdata/statsum_epsus	             
	Orbis     orbis_layer               # layers of subsidiaries generated from
	                                    Orbis - Bureau van Dijk 
										(shared by Jim Goldman & Stefan Zeume)
										 
	BLS_PPI
	M3_survery 
	RateWatch 
	RD 
	leases_and_loans                        http://www.elfaonline.org/data/MLFI
*/

********************************************************************************
*   PRELIMINARY DATASETS
********************************************************************************

* These programs are run before the main Compustat datasets are cleaned 
* These data are merged onto the main datasets

********************************************************************************
* Segment information 

capture program drop clean_segment /* %< */
program define clean_segment 
	
	use "$analysis/Compustat/compustat_segmerged.dta", clear
	
	drop if stype=="OPSEG"|stype=="STSEG"|stype=="GEOSEG"
	rename *, lower
	order gvkey datadate srcdate sid sics1 sics2 sales ///
		  rds capxs ias oibdps snms emps
	bysort gvkey datadate sid: egen maxsrc=max(srcdate)
	format maxsrc %td
	drop if srcdate!=maxsrc
	drop maxsrc
	replace capxs=0 if capxs<0
	replace sales=0 if sales<0
	sort gvkey datadate srcdate
	
	foreach x of varlist sales capxs ias ops dps emps{
		by gvkey datadate: egen total_`x'=total(`x'), missing
		by gvkey datadate: egen seg_`x'_sd=sd(`x')
		by gvkey datadate: egen seg_`x'_mean=mean(`x')
		gen seg_`x'_sd2mean=seg_`x'_sd/seg_`x'_mean
	}

	* Total number of segments reported
	bysort gvkey datadate: egen seg_num=count(sid)

	* Different sic code reported, four digits, three digits, two digits
	gen sics_3=substr(sics1,1,3)
	destring sics_3, replace
	gen sics_2=substr(sics1,1,2)
	destring sics_2, replace
	destring sics1, replace
	duplicates drop gvkey datadate sics1, force
	by gvkey datadate: egen sic4_count=count(sics1)
	duplicates drop gvkey datadate sics_3, force
	by gvkey datadate: egen sic3_count=count(sics_3)
	duplicates drop gvkey datadate sics_2, force
	by gvkey datadate: egen sic2_count=count(sics_2)
	
	la var seg_num "total segment number"
	la var sic2_count "number of sic2 digits in segments"
	la var sic3_count "number of sic3 digits in segments"
	la var sic4_count "number of sic4 digits in segments"
	
	gen gvkey_s=gvkey
	destring gvkey, replace
	order gvkey datadate srcdate sid sics1 sics_3 sics_2 seg_num ///
		  sic4_count sic3_count sic2_count sales  capxs ias ops dps ///
		  intseg emps  naicsh naicss1 snms 
	replace sic4_count=. if sic4_count==0
	replace sic3_count=. if sic3_count==0
	replace sic2_count=. if sic2_count==0
	keep gvkey datadate  seg_num sic4_count sic3_count sic2_count ///
		 total_* *sd2mean  *_sd *_mean
	duplicates drop gvkey datadate, force
	
	save "$analysis/sicsegment.dta", replace 

end /* %> */

********************************************************************************
* Executive ownership data

capture program drop clean_executive /* %< */
program define clean_executive 

	use "$analysis/Compustat/anncomp.dta", clear
	
	foreach v of varlist _all {
	capture rename `v' `=lower("`v'")'
	}
	
	bysort gvkey year: gen count=_N 
	su count, de  
	keep gvkey year shrown_tot shrown_tot_pct shrown_excl_opts ///
		 shrown_excl_opts_pct ceoann cfoann 
	
	foreach x in ceo cfo{
		gen `x'pct= shrown_excl_opts_pct if `x'ann!=""
		bysort gvkey year: egen `x'_pct=max(`x'pct)
		drop `x'pct
	}

	gsort gvkey year -shrown_excl_opts_pct
	bysort gvkey year: gen rank=_n 
	bysort gvkey year: egen top5pct=total(shrown_excl_opts_pct) if rank<=5
	bysort gvkey year: egen top5exe_pct=max(top5pct)
	order gvkey year shrown_excl_opts_pct rank top5*
	drop top5pct
	bysort gvkey year: egen allexe_pct=total(shrown_excl_opts_pct)
	bysort gvkey year: keep if _n==1
	
	foreach x in  *_pct{
		winsor2 `x', replace cut(1 99)
	}

	keep gvkey year ceo_pct cfo_pct top5exe_pct allexe_pct
	destring gvkey, replace 
	rename year fyear

	save "$analysis/exeownership.dta", replace 

end /* %> */

********************************************************************************
* Earnings Surprise data

capture program drop clean_earnings /* %< */
program define clean_earnings

use "$analysis/IBES/statsum_epsus.dta", clear
rename *, lower
keep if fiscalp=="QTR" & fpi=="6"&curr_act=="USD"&actual!=.
keep if curcode=="USD"
*Remove if consensus records (or stated quarter ends) are 1 day before, on, or after the earnings announcement
drop if statpers>=anndats-1 | anndats<=fpedats
gen cqrt=qofd(fpedats)
format cqrt %tq

//keep the estimates cloest to the announcement date
bysort ticker fpedats: gen count=_N
bysort ticker fpedats: egen maxdate=max(statpers)
drop if count>1&statpers!=maxdate
drop if (fpedats-statpers)>31
rename stdev eps_stdev
rename numest eps_numest
rename ticker ibtic
rename oftic tic
egen groupid=group(ibtic)
duplicates drop groupid cqrt, force
tsset groupid cqrt

//generate how much the actual beats the forecase
gen eps_median_surprise_q=(actual-medest)
gen eps_mean_surprise_q=(actual-meanest)

foreach x in eps_median_surprise_q eps_mean_surprise_q{
winsor2 `x', cut(1 99) replace
}
keep ibtic tic statpers fpedats cqrt anndats_act medest meanest actual eps_* fiscalp groupid cqrt  eps_numest
gen month=month(fpedats)
gen year=year(fpedats)
gen qend_month=ym(year, month)
format qend_month %tm
gen quarter=quarter(fpedats)

drop if eps_median_surprise_q==.&eps_mean_surprise_q==.
label var eps_median_surprise_q "Median EPS Estimate - Actual EPS"
label var eps_mean_surprise_q "Mean EPS Estimate - Actual EPS"
keep ibtic qend_month medest meanest actual eps_*  year month quarter cqrt 
save "$analysis/earnings_surprise_quarterly.dta", replace

end /* %> */


********************************************************************************
* Generate accountyingy.dta for gvkey-fyear level information


capture program drop build_accountingy /* %< */
program define build_accountingy 

	* Compustat firm rating
	use "$analysis/Compustat/compustat_sp_rating.dta",clear

	destring gvkey,replace             
	gen year=year(datadate)
	gen month1=month(datadate)
	gen month=ym(year,month)
	format month %tm
	duplicates drop gvkey month splticrm, force
	
	gen     sp_rating=1 if splticrm=="AAA" 
	replace sp_rating=2 if splticrm=="AA+"
	replace sp_rating=3 if splticrm=="AA"
	replace sp_rating=4 if splticrm=="AA-"
	replace sp_rating=5 if splticrm=="A+"
	replace sp_rating=6 if splticrm=="A"
	replace sp_rating=7 if splticrm=="A-"
	replace sp_rating=8 if splticrm=="BBB+"
	replace sp_rating=9 if splticrm=="BBB"
	replace sp_rating=10 if splticrm=="BBB-"
	replace sp_rating=11 if splticrm=="BB+"
	replace sp_rating=12 if splticrm=="BB"
	replace sp_rating=13 if splticrm=="BB-"
	replace sp_rating=14 if splticrm=="B+"
	replace sp_rating=15 if splticrm=="B"
	replace sp_rating=16 if splticrm=="B-"
	replace sp_rating=17 if splticrm=="CCC+"
	replace sp_rating=18 if splticrm=="CCC"
	replace sp_rating=19 if splticrm=="CCC-"
	replace sp_rating=20 if splticrm=="CC"
	replace sp_rating=21 if splticrm=="C"
	replace sp_rating=25 if splticrm=="D" 
    
	tab splticrm if sp_rating==.
	drop if sp_rating==.

	* Generate an indicator for investment grade
	gen fy_junkgrade=0 if sp_rating<11
	replace fy_junkgrade=1 if sp_rating>=11
	keep gvkey  sp_rating month splticrm
	destring gvkey, replace
	label var sp_rating "S&P Long Term Credit Rating"
	compress
	* Save cleaned data 
	save "$analysis/sp_rating.dta", replace

	* Bring in more Compustat fund information
	use "$analysis/Compustat/funda.dta", clear

	keep if stko==0|stko==3
	drop if at==0
	#delimit ;
	set more off;
	set linesize 101;
	drop if fyear==.;
	
	* Set as panel data;
	gen gvkey_s=gvkey; destring gvkey, replace;
	tsset gvkey fyear;
	
	* Replace missing data as 0 for certain variables;
	replace dvp=0 if dvp==.; 
	replace dvc=0 if dvc==.;
	replace dv=dvc+dvp if dv==.|dv<0;
	replace txdb=0 if txdb==.; 
	replace prstkc=0 if prstkc==.; 
	replace dltr=0 if dltr<0; 
	replace dltis=0 if dltis<0;
	
	* Limit to zero;
	foreach v in dltt at lt capx aqc  xrd xad  ceq che xsga  seq sale ppent dpc xint dvc{;
		replace `v'=0 if `v'<0;
	};
	
	* Basic asset debt and equity data;
	gen bd=dltt+dlc;     gen me = csho*prcc_f;  gen be=at-lt+txditc; 

	* Market to book;
	gen mkt2bk=(at-ceq+csho*prcc_f)/at;  
	
	* Payout;
	gen paydiv=(dvc+dvp>0 ); 

	* Investment to ppe ratios;
	gen capx2ppe=capx/l.ppent; 
	
	* Stuff to at ratios;
	gen cf2at=(ib+dpc)/l.at;
	gen res2at=che/at;
	gen capx2at=capx/l.at; 
	gen roa=ib/L.at;
	gen tangible=ppent/at;
	gen xrd2at=xrd/L.at; 
	gen debt2at=(dlc+dltt)/at; 
	gen ebitda2at=ebitda/at; 
	
	* Q;
	gen q2=(at+csho*prcc_f-ceq-txdb)/at;  

	* Limit to 0 and winsorize top;
	foreach v in mkt2bk capx2ppe capx2at {;
		replace `v'=0 if `v'<0;
		winsor `v', gen(temp) p(0.01) highonly;
		drop `v';
		rename temp `v';
	};

	* Winsorize both sides;
	foreach v in cf2at res2at ebitda2at q2 roa{;
		winsor2 `v', cut(1 99) replace;
	};

	* Calendar year and month;
	rename fyr cal_month;
	gen cal_year=fyear+(cal_month<=5);
	gen month=ym(cal_year, cal_month);
	format month %tm;
	gen fyr=cal_month; la var fyr "fiscal year end month";
	sort gvkey fyear;
	duplicates tag gvkey fyear, gen(dup); assert dup==0; drop dup;
	compress;
	#delimit cr

	* Keep useful variables
	keep gvkey fyear datadate naicsh naics sich tic cusip sic cal_* month  fyr /// 
		 stko fic loc fic ein at che capx  xrd  ppent oibdp ebit ebitda sale ib ///
		 txp cf2at res2at capx2at  xrd2at roa ebitda2at dlc dltt ceq debt2at ///
		 mkt2bk paydiv capx2ppe dp q2 
	destring sic, replace
	ffind sic, newvar(ff12) type(12)
	drop if datadate==.

	mmerge gvkey datadate using "$analysis/Compustat/sicsegment.dta", ///
		   type(1:1) unmatched(master) noshow
	drop _merge
	
	* Add in the duration data from Zwick Mahon 2017
	tostring naicsh, replace
	gen four_digit_naics=substr(naics, 1,4) if strlen(naics)==6
	replace four_digit_naics=substr(naics,1,4) if strlen(naics)==4
	destring four_digit_naics, replace
	
	merge m:1 four_digit_naics using "$analysis/naics_wpv.dta", keep(1 3) nogen
	#delimit
	;
	la var mkt2bk "market to book ratio ";
	la var paydiv "dividend payment dummy";
	la var capx2ppe "capital expenditure to lagged ppe ratio"; 
	la var cf2at "cash flow to lagged assets ratio";  
	la var res2at "cash reserves to assets ratio";
	la var capx2at "capital expenditure to lagged assets ratio";
	la var ebitda2at "ebitda to asset ratio";
	la var q2 "Tobin's"
	; 
	#delimit cr
	compress

	* Add in S&P long term rating
	* Keep any rating that is within 6 months before the fyr end 
	preserve 
	
	keep gvkey fyear datadate
	gen fyrend_month=ym(year(datadate), month(datadate))
		
	mmerge gvkey using "$analysis/Compustat/sp_rating.dta", ///
		   type(n:n) unmatched(master)
	gen diff=fyrend_month-month
	bysort gvkey fyrend_month: egen minabsdiff=min(abs(diff))
	drop if minabsdiff!=. &abs(diff)!=minabsdiff
	drop minabsdiff
	bysort gvkey fyrend_month: gen count=_N
	drop if count>1&diff<0
	drop count
	replace sp_rating=. if diff<-6&diff!=.
	replace sp_rating=. if diff>6&diff!=.
	drop diff
	rename month splticrm_month
	drop _merge
	keep gvkey fyear fyrend_month splticrm_month sp_rating
	drop if sp_rating==.
		
	tempfile sp_rating 
	save "`sp_rating'"
		
	restore
	
	* Generate accountingy.dta 
	gen fyrend_month=ym(year(datadate), month(datadate))
	
	merge 1:1 gvkey fyear using "`sp_rating'", keep(1 3) nogen
	gen junkrating=(sp_rating>=10&sp_rating!=.)
	replace junkrating=. if sp_rating==.
	gen fp=(ebit-txp-capx<0)
	replace fp=. if ebit==.|txp==.|capx==.
	gen nr=(sp_rating==.)
	gen fp2=fp*nr
	
	la var junkrating "Speculative Grade"
	la var fp  "Faulkender-Petersen I"
	la var fp2 "Faulkender-Petersen II"
	
	compress
	
	save "$analysis/accountingy.dta", replace

end /* %> */

********************************************************************************
* Quarterly data cleaning 

capture program drop clean_quarterly /* %< */
program define clean_quarterly

   use "$analysis/Compustat/fundq.dta", clear
   
   keep if datafqtr!=""
   gen gvkey_s=gvkey
   destring gvkey, replace
   gen fyearquarter=yq(fyearq, fqtr)
   bysort gvkey fyearquarter: keep if _n==1
   tsset gvkey fyearquarter
   rename fqtr fquarter 
   rename fyearq fyear
   gen quarter=substr(datacqtr,6,1)
   gen year=substr(datacqtr,1,4)
   destring quarter year, replace 
   
	* Generate quarterly inventory data   
	preserve 
   
	gen invcq=d.invtq
	keep gvkey fyear fquarter invtq  invcq
	reshape wide invtq invcq, i(gvkey fyear) j(fquarter)
	save "$analysis/inventory.dta", replace
		
	restore  
   
	* Variables ending in y represent fiscal year to date;
	* Convert fiscal year to date variables to quarterly;
	tsset gvkey fyearquarter
	
	foreach v in sppiv aqc capx dpc dltis dltr{
		g `v'q=`v'y if fquarter==1
		replace `v'q=S.`v'y if fquarter>=2 & fquarter<=4
	}

	foreach x in dltisq dltrq dltry dltisy{
		replace 	`x'=0 if `x'<0 
	}
	
	gen bdq=dlcq+dlttq
	keep gvkey fquarter fyear year quarter fyearquarter atq saleq saley xoprq ///
		 xopry revtq revty cogsq cogsy xsgaq xsgay dpq dpy oiadpq oiadpy oibdpq ///
		 oibdpy capxy ibq iby  dpcy scfq  aqcq  aqaq aqpq capxq  dpcq xrdq ///
		 xrdy dlcq dlttq  dltisq dltisy dltry dltrq bd  bdq
	
	order gvkey fquarter fyear year quarter fyearquarter atq saleq saley xoprq ///
		  xopry revtq revty cogsq cogsy xsgaq xsgay dpq dpy oiadpq oiadpy oibdpq ///
		  oibdpy capxy ibq iby  dpcy scfq aqcq  aqaq  aqpq  capxq  dpcq xrdq ///
		  xrdy dlcq dlttq

	tsset gvkey fyearquarter
	replace dpcq=0 if dpcq==.
	gen cfq=ibq+dpcq
	gen cfy=.
	
	* Amount of net issuance
	gen netdebtissueq=d.bdq
	gen netltdissueq=d.dlttq
	gen netdebtissuey=.
	gen netltdissuey=.

	foreach x in sale xopr revt cogs xsga dp oiadp oibdp  ib xrd cf dpc ///
	             netdebtissue netltdissue dltis dltr{
		replace `x'y=`x'q  if  (fquarter==1&`x'y==.)
		replace `x'y=`x'q+l.`x'q if (fquarter==2&`x'y==.)
		replace `x'y=`x'q+l.`x'q+l2.`x'q if  (fquarter==3&`x'y==.) 
		replace `x'y=`x'q+l.`x'q+ l2.`x'q+ l3.`x'q if (fquarter==4&`x'y==.) 
	}
	
	gen qendmonth=.
	
	forvalues i=1/4{
		replace qendmonth=3*quarter if quarter==`i'
	}
	
	gen qend_month=ym(year, qendmonth)
	gen yearquarter=yq(year, quarter)
	drop if qend_month==.
	compress
	
	save "$analysis/quarterlydata.dta", replace

	*generate forward data
	forvalue i=1/1{
	use "$analysis/quarterlydata.dta", clear
	keep gvkey fyear fquarter saleq xrdq dpq dpcq capxq  dltisq dltrq  cfq oibdpq
	replace fyear=fyear-`i'
	foreach x in saleq xrdq dpq dpcq capxq dltisq dltrq cfq oibdpq{
	rename `x' `x'f`i'
	}
	reshape wide  saleqf xrdqf dpqf dpcqf capxqf dltisqf dltrqf cfqf oibdpqf, i(gvkey fyear) j(fquarter)
	save "$analysis/capxf`i'.dta", replace
	}
	
end /* %> */

********************************************************************************
* Merging data to create mergeddata.dta

capture program drop build_mergeddata /* %< */
program define build_mergeddata

	* Generate forward data 
	forvalue i=1/1{
		use "$analysis/quarterlydata.dta", clear
		keep gvkey fyear fquarter saleq xrdq dpq dpcq capxq dltisq dltrq cfq oibdpq
		replace fyear=fyear-`i'
			foreach x in saleq xrdq dpq dpcq capxq dltisq dltrq cfq oibdpq{
			rename `x' `x'f`i'
		}
		reshape wide  saleqf xrdqf dpqf dpcqf capxqf dltisqf dltrqf cfqf oibdpqf, i(gvkey fyear) j(fquarter)
		save "$analysis/capxf`i'.dta", replace
	}

	use "$analysis/quarterlydata.dta", clear
	merge n:1 gvkey using "$analysis/Compustat/gvkey_ibtic.dta", keep(1 3) nogen 
	drop if ibtic1!=""|ibtic2!=""|ibtic3!=""
	drop ibtic1 ibtic2 ibtic3
	tempfile missingibticq
	save "`missingibticq'"

	* Add in the earnings annoucement information (how much does a firm beat the
	* earning estimate ) IBES data sorted. 
	* There might be multiple ibtic corresponding to one gvkey
	forvalue i=1/3{
		use "$analysis/quarterlydata.dta", clear
		merge n:1 gvkey using "$analysis/Compustat/gvkey_ibtic.dta", keep(1 3) nogen 
		keep if ibtic`i' != ""
		rename ibtic`i' ibtic
		mmerge ibtic qend_month using "$analysis/earnings_surprise_quarterly.dta", ///
		       type(1:1) unmatched(master) 
		drop _merge ibtic*
		tempfile ibticq`i'
		save "`ibticq`i''"
	}
	
	use "`missingibticq'"
	append using "`ibticq1'"
	append using "`ibticq2'"
	append using "`ibticq3'"
	
	order gvkey fyear  fquarter
	bysort gvkey fyear fquarter: gen count=_N
	bysort gvkey fyear fquarter: egen maxsurprise= max(eps_mean_surprise)
	drop if count>1&maxsurprise!=.&eps_mean_surprise==.
	drop count
	bysort gvkey fyear fquarter: drop if  eps_mean_surprise!=maxsurprise
	bysort gvkey fyear fquarter: keep if _n==1
	drop   qend_month maxsurprise 
	drop fyearquarter month year quarter
	reshape wide  atq-eps_mean_surprise_q , i(gvkey fyear) j(fquarter)
	
	merge 1:1 gvkey fyear using "$analysis/accountingy.dta", keep(2 3) nogen
	replace fyrend_month=ym(cal_year, fyr)
	bysort gvkey fyear: keep if _n==1
	compress
	tsset gvkey fyear
	gen lq=l.q2
	gen l_at=log(at)
	
	save "$analysis/mergeddata.dta", replace

end /* %> */

********************************************************************************
* Final variable cleaning before creating main datasets 

capture program drop finalclean /* %< */
program define finalclean

	forvalue i=1/1{
		merge 1:1 gvkey fyear using "$analysis/capxf1.dta", keep(1 3)  nogen
	}

	foreach x in  sale  xrd dp dpc capx  dltis dltr{
		replace `x'y3=. if  `x'y3==0
		replace `x'y4=. if  `x'y4==0
		gen `x'4_3=`x'q4/(`x'y3/3)
		gen `x'4_4=`x'q4/(`x'y4/4)
		gen `x'1_4=`x'q1/(`x'y4/4)
		gen `x'2_4=`x'q2/(`x'y4/4)
		gen `x'3_4=`x'q3/(`x'y4/4)
	}
	
	foreach x in  sale  xrd dp dpc capx  dltis dltr{
		replace `x'y3=. if  `x'y3==0
		gen `x'4f1_3=((`x'q4+`x'qf11)/2)/(`x'y3/3)
	}
	
	* Generate a measure to compare the Q4 leverage and net debt issue vs Q1-Q3
	foreach x in bd dltt dlc{
		gen `x'4_3=`x'q4*3/(`x'q1+`x'q2+`x'q3)
		gen `x'4_4=`x'q4*4/(`x'q1+`x'q2+`x'q3+`x'q4)
	}
	
	gen oibdpy32at=oibdpy3/at
	
	* Winsorize both sides
	foreach v in cf2at capx2at res2at ebitda2at  mkt2bk  q2 capx2ppe{
		winsor2 `v', cut(1 99) replace
	}
	
	replace capx4_3=. if capxq4<0 |capxy3<0
	replace capx4_4=. if capxq4<0 |capxy4<0
	replace capx2_4=. if capxq2<0 |capxy4<0
	replace capx3_4=. if capxq3<0 |capxy4<0
	replace capx1_4=. if capxq1<0|capxy4<0
	replace capx4f1_3=. if capxq4<0 |capxy3<0|capxqf11<0

end /* %< */

capture program drop finalclean2 /* %< */
program define finalclean2

	foreach x in  sale  xrd dp dpc capx  dltr dltis bd dltt dlc{
		winsor2 `x'4_3, cut(3 97) replace 
		replace `x'4_3=`x'4_3*100
	}
	
	foreach x in  sale  xrd dp dpc capx  dltr dltis{
		forvalues i=1/4{
			winsor2 `x'`i'_4, cut(3 97) replace
			replace `x'`i'_4=`x'`i'_4*100
		}
	}
	
	gen capx4_y=capx4_4/4
	winsor2 capx4f1_3, cut(3 97) replace 
	replace capx4f1_3=capx4f1_3*100
	
	la var junkrating "Speculative Grade"
	la var fp  "Faulkender-Petersen I"
	la var fp2 "Faulkender-Petersen II"
	la var paydiv "Dividend Payers"
	la var sp_rating "S\&P Rating"
	la var mkt2bk "M/B"
	la var res2at "$\frac{Cash Holdings}{Asset}$"
	la var capx2ppe " $\frac{CAPEX}{PPE}$"
	la var sale4_3  "Sales $\frac{Q4}{Ave(Q1-Q3)}$"
	la var ebitda2at "$\frac{EBITDA}{Asset}$"
	la var cf2at  " $\frac{Cash Flow}{Asset}$"
	la var capx4_3 "CAPEX $\frac{Q4}{Ave(Q1-Q3)}$\%"
	la var dlc4_3 "Current Debt $\frac{Q4}{Ave(Q1-Q3)}$\%"
	la var dltt4_3  "Long Term Debt $\frac{Q4}{Ave(Q1-Q3)}$\%"
	la var dltis4_3 "Debt Issue $\frac{Q4}{Ave(Q1-Q3)}$\%"
	la var dltr4_3  "Debt Repayment $\frac{Q4}{Ave(Q1-Q3)}$\%"
	la var bd4_3  "Total Debt $\frac{Q4}{Ave(Q1-Q3)}$\%"
	la var capx4f1_3 "CAPEX $\frac{(Q4+f.Q1)}{Ave(Q1-Q3)}$\%"
	la var capx2_4 "CAPEX $\frac{Q2}{Ave(Q1-Q4)}$\%"
	la var capx3_4 "CAPEX $\frac{Q3}{Ave(Q1-Q4)}$\%"
	la var capx4_4 "CAPEX $\frac{Q4}{Ave(Q1-Q4)}$\%"
	la var capx4_y "CAPEX $\frac{Q4}{Total(Q1-Q4)}$\%"
	la var dlc4_4 "Current Debt $\frac{Q4}{Ave(Q1-Q4)}$\%"
	la var dltt4_4  "Long Term Debt $\frac{Q4}{Ave(Q1-Q4)}$\%"
	la var dltis4_4 "Debt Issue $\frac{Q4}{Ave(Q1-Q4)}$\%"
	la var dltr4_4  "Debt Repayment $\frac{Q4}{Ave(Q1-Q4)}$\%"
	la var bd4_4  "Total Debt $\frac{Q4}{Ave(Q1-Q4)}$\%"
	la var ppe "PPE(Mils)"
	la var sale "Sales (Mils)"
	la var at "Assets (Mils)"
	la var capx "CAPEX (Mils)"
	la var dp "Depreciation (Mils)"
	
end
********************************************************************************

********************************************************************************
*   MAIN DATASETS 
********************************************************************************

********************************************************************************
* Creating capxannual, capxannual_p, capxannual_trim5, capxannual_trim5pre83

capture program drop build_capxannual /* %< */
program define build_capxannual 

******************************
* Create capxannual

	
	use "$analysis/mergeddata.dta", clear

	* Generate lagged asset and q for investment-cash flow sensititivy test
	drop if fyear<1984 | fyear>2016
	finalclean
	merge 1:1 gvkey fyear using "$analysis/inventory.dta", keep( 1 3) nogen
	
	save "$analysis/capxannual.dta", replace
	
******************************************
* Create capxannual_p

	
	drop if at<10|at==.
	drop if dlc+dltt+ceq<10 
	drop if ff12==11|ff12==8   
	drop if capx4_3==. 
	finalclean2

	preserve 
	
	winsor2 capx4_3, cuts(3 97) trim suffix(_3p) label 
	winsor2 capx4_3, cuts(5 95) trim suffix(_5p) label 
	winsor2 capx4_3, cuts(7 93) trim suffix(_7p) label 
		
	save "$analysis/capxannual_p.dta", replace 
	
	restore 

**************************************************************
* Create capxannual_trim5

	
	egen maxcapx4_3=max(capx4_3)
	egen mincapx4_3=min(capx4_3)
	drop if capx4_3>500 |capx4_3==mincapx4_3  
	replace dltis4_3=. if dltis4_3>500&dltis4_3!=.
	drop mincapx4_3 maxcapx4_3
	
	save "$analysis/capxannual_trim5.dta", replace
	
******************************	
* Create capxannual_trim5pre83

	* Need pre83 data for cumulative event_regression and plot 
	use "$analysis/mergeddata.dta", clear
	
	finalclean // see program finalclean above
	
	drop if at<10|at==.
	drop if dlc+dltt+ceq<10 
	drop if ff12==11|ff12==8   
	
	finalclean2 // see program finalclean2 above 
	
	compress
	egen maxcapx4_3=max(capx4_3)
	egen mincapx4_3=min(capx4_3)
	drop if (capx4_3>500&capx4_3!=.) |(capx4_3==mincapx4_3&capx4_3!=.)  
	drop mincapx4_3 maxcapx4_3
	
	save "$analysis/capxannual_trim5pre83.dta", replace

end /* %> */

********************************************************************************
* International data cleaning and creating data set international_trim5
* Figure 2 utilizes these data 
* Table 2 utilizes these data 

capture program drop build_international /* %< */
program define build_international 

	use "$analysis/Compustat/internationala.dta", clear
	
	destring gvkey, replace
	duplicates drop gvkey fyear, force
	tsset gvkey fyear
	gen capx2ppe=capx/L.ppent
	la var capx2ppe "CAPEX/PPE"
	gen cf2at=(ib+dpc)/L.at
	la var cf2at "cash flow to lagged assets ratio"
	gen res2at=che/at
	la var res2at "cash reserves to assets ratio"
	gen ebitda2at=ebitda/l.at
	gen l_at=log(at)
	gen me=cshoc*prccd/1000000
	gen mkt2bk=(at-ceq+me)/at
	la var mkt2bk "market to book ratio "
	keep gvkey fyear capx2ppe res2at ebitda2at l_at cf2at mkt2bk at ppent sale
	
	tempfile internationala
	save "`internationala'"

	use "$analysis/Compustat/internationalq.dta", clear
 
	keep if datafqtr!=""
	gen gvkey_s=gvkey
	destring gvkey, replace
	gen fyearquarter=yq(fyearq, fqtr)
	drop if substr(sic,1,1)=="6"
	destring sic, replace
	drop if (sic>=4900&sic<=4949)
	drop if atq<0|atq==.
	sort gvkey datafqtr datadate, stable
	bysort gvkey datafqtr: keep if _n==_N
	tsset gvkey fyearquarter
	
	* Variables ending in y represent fiscal year to date;
	* Convert fiscal year to date variables to quarterly;
	foreach v in capx dp dpc{
		g `v'q=`v'y if fqtr==1
		replace `v'q=S.`v'y if fqtr>=2 & fqtr<=4
	}
	
	gen cfq=ibq+dpcq
	gen cfy=.
	
	foreach x in sale oiadp oibdp  ib cf{
		replace `x'y=`x'q  if  (fqtr==1&`x'y==.)
		replace `x'y=`x'q+l.`x'q if (fqtr==2&`x'y==.)
		replace `x'y=`x'q+l.`x'q+l2.`x'q if  (fqtr==3&`x'y==.) 
		replace `x'y=`x'q+l.`x'q+ l2.`x'q+ l3.`x'q if (fqtr==4&`x'y==.) 
	}
	
	drop fyr acctstdq datafqtr datadate fyearquarter 
	reshape wide atq-saley cfy capxq dpq dpcq cfq  , i(gvkey fyearq) j(fqtr)
	drop if capxy4<0|capxq1<0|capxq2<0|capxq3<0|capxq4<0
	drop if capxq4==.
	drop if capxq1==.&capxq2==.&capxq3==.
	drop if capxy3<0
	rename fyearq fyear
	
	* Drop countries where number of observations per year is at most <50
	bysort fic fyear: gen count = _N
	bysort fic: egen maxcount = max(count)
	unique fic if maxcount<50
	drop if maxcount<50

	* Remove country-year with fewer than 50 observations; 
	* After making sure that the years with few observations are concentrated ///
	* in earlier (not the middle) years, drop these years 
	preserve
	
	keep fic fyear count
	duplicates drop
		
	restore
	
	drop if count <50
	egen maxfyear = max(fyear),by(fic)
	egen minfyear=min(fyear), by(fic)
	gen length=maxfyear-minfyear
	drop if length<5
	drop count maxcount
	
	merge 1:1 gvkey fyear using "`internationala'", keep(3)

	* Majority of the international data starts from 2004 or 2005, 
	* countries like BGR and AUS have earlier data but are incomplete. 
	drop if fyear<2004
	drop if fyear > 2016
	
	* Drop the first year of singapore, as the observation is significantly
	* fewer than the later years.
	drop if fic=="GBR"&fyear==2004
	drop if fic=="SGP"&fyear==2004
	drop if fic=="ISR"&fyear==2004
	drop if fic=="BMU"|fic=="CYM"|fic=="ZAF"|fic=="LKA"|fic=="TUR"
	drop if at==.|at==0
 
	foreach x in  cf2at mkt2bk res2at capx2ppe ebitda2at{
		winsor2 `x', cut(1 99) replace 
	}
	
	foreach x in  capx sale{
		replace `x'y3=. if  `x'y3==0
		gen `x'43=`x'q4/(`x'y3/3)
		winsor `x'43, gen(`x'4_3) p(0.03) 
		drop `x'43
		replace `x'4_3=`x'4_3*100
	}
	
	egen mincapx=min(capx4_3)
	egen maxcapx=max(capx4_3)
	drop if capx4_3>500|capx4_3==mincapx
	drop mincapx maxcapx
	drop if at==.|at==0
	drop if fic=="ISR"

	la var cf2at "Cash Flow/Assets"
	la var res2at "Cash/Assets"
	la var ebitda2at "EBITDA/Assets"
	la var capx4_3 "CAPEX Q4/Ave(Q1-Q3)\%"
	la var sale4_3 "Sales Q4/Ave(Q1-Q3)\%"
	la var capx2ppe "CAPEX/PPE"
	la var ppent "Property, Plant and Equipment (Mils)"
	la var sale "Sales (Mils)"
	la var at "Assets (Mils)"
	la var capxy4 "Capital Expenditures (Mils)"
	la var dpy4 "Depreciation and Amortization (Mils)"
	la var mkt2bk "M/B"
	
	* Drop countries with fewer than 11 years of quarterly data
	egen b = nvals(fyear),by(fic)
	drop if b < 11
	drop b
	
	save "$analysis/international_trim5.dta", replace

end /* %> */

********************************************************************************

********************************************************************************
*   CUMULATIVE EFFECT REGRESSION DATASETS 
********************************************************************************

********************************************************************************
* Generating data based on spiking event, 
* defined as spikes in quarter 4 > median q4 level.  

capture program drop clean_ppentq /* %< */
program define clean_ppentq

	* Pull in quarterly ppent data to serve as denominator. 

	use "$analysis/Compustat/fundq.dta", clear
	
	keep if datafqtr!=""
	gen fyearquarter=yq(fyearq, fqtr)
	destring gvkey, replace 
	bysort gvkey fyearquarter: keep if _n==1
	tsset gvkey fyearquarter 
	gen lppentq=l.ppentq
	keep gvkey fyearquarter ppentq lppentq 
 
	save "$analysis/ppentq.dta", replace
	
end /* %> */ 


********************************************************************************
* Regression and output  
 
capture program drop denom /* %< */
program define denom
	
	*use this to define the denominator automatically for different pre-period
	gen pre=(dis<0)
	bysort eventid: egen avecapxpre=mean(capx) if pre==1
	bysort eventid: egen aveppepre=mean(ppent) if pre==1
	order eventid dis capx ppent avecapxpre aveppepre  
	bysort eventid: egen capxpre=max(avecapxpre)
	bysort eventid: egen ppepre=max(aveppepre)
	order eventid dis capx ppent avecapxpre aveppepre capxpre ppepre
	gen capx2capxpre=capx/capxpre
	gen capx2ppepre=capx/ppepre
	winsor2 capx2capxpre capx2ppepre, cut(1 99) replace 
 
 end /* %< */
  
capture program drop build_spikeraw /* %< */
program define build_spikeraw 

	use "$analysis/capxannual_trim5.dta", clear
	
	su capx1_4 capx2_4 capx3_4 capx4_4, de
	bysort gvkey: keep if _n==1
	keep gvkey

	tempfile sampleid
	save "`sampleid'"
	
	forvalues i=1986/2013{
		use "$analysis/quarterlydata.dta", clear
		replace capxq=. if capxq<0
		merge m:1 gvkey using "`sampleid'", keep(3) nogen
		tsset gvkey fyearquarter
		gen basepreq3=(capxq+l.capxq+l2.capxq+l3.capxq)/4
		drop if fyear<`i'-3
		bysort gvkey: egen minyear=min(fyear)
		drop if minyear!=`i'-3
		egen minyq=min(fyearquarter)
		gen eventtime=fyearquarter -minyq-11
		drop if eventtime>12
		gen capxfqsum=capxq if fyear<=`i'
		gen capxfqave=capxq if fyear<=`i'
		bysort gvkey: egen count=count(capxq)
		su capxq, de
		tab fyear
		drop if count<20
		drop count

		preserve
	
		keep if fyear==`i'
		keep gvkey fyear fquarter capxq basepreq3
		drop if capxq==.
		reshape wide capxq basepreq3, i(gvkey fyear) j(fquarter)
		gen baseq14=(capxq1+capxq2+capxq3+capxq4)/4
		gen baseq23=(capxq2+capxq3)/2
		gen baseq13=(capxq1+capxq2+capxq3)/3
		keep gvkey baseq14 baseq23 baseq13 basepreq31 basepreq32 basepreq33 basepreq34
		
		tempfile base`i'
		save "`base`i''"
		
		restore

		keep  gvkey fyearquarter eventtime capxq capxfq* fquarter fyear
		merge m:1 gvkey using "`base`i''", keep(3) nogen
		tsset gvkey fyearquarter
		sort gvkey fyearquarter

		foreach x in q13 q14 q23 preq31 preq32 preq33 preq34{
			gen capx`x'=capxq/base`x'
		}
		
		compress
		save "$analysis/yearnew`i'.dta", replace
	}
 
	forvalues j=4/4{
		#delimit ;
		forvalues i=1986/2013{;
			use "$analysis/yearnew`i'.dta", clear;
			sort gvkey fyearquarter;
			local emin=`j'+1; local emax=`j'+8;
		forvalues e=`emin'/`emax'{;
			bysort gvkey: replace capxfqsum=l.capxfqsum+capxq if eventtime==`e';
			bysort gvkey: replace capxfqave=(l.capxfqsum+capxq)/(`e'-`emin'+2) ///
							       if eventtime==`e';
		};
		foreach x in q13 q14 q23 preq31 preq32 preq33 preq34{;
			gen ave`x'=capxfqave/base`x';
		};
	
		keep gvkey fyearquarter capxq13 capxq14 capxq23 capxpreq* ave* ///
		     eventtime capxq;
	
		foreach x of varlist capxq13 capxq14 capxq23  capxpreq* ave*{;
			winsor `x', gen(w) p(0.03);drop `x';rename w `x';
			replace `x'=. if `x'>5;egen min=min(`x');replace `x'=. if `x'==min;drop min;
		};

		sort gvkey fyearquarter;
		bysort gvkey: gen keepid=(eventtime==`j'&capxq14>1.1291);
		bysort gvkey: egen keepidmax=max(keepid);  
		keep if keepidmax==1;drop keepid keepidmax; gen eventyear=`i';
	
		tempfile year`i'; save "`year`i''"; 
		
		};

	use "`year1986'";   
	forvalues i=1987/2013{; 
		append using   "`year`i''"; 
	};
	
	keep if eventtime<=`j'+8&eventtime>=`j'-12;
	replace eventtime=eventtime-`j';
	order eventtime;
	merge n:1 gvkey fyearquarter using "$analysis/ppentq.dta", keep(1 3) nogen;
	order gvkey eventyear fyearquarter;
	sort gvkey eventyear fyearquarter;
	
	save "$analysis/q`j'spikeraw.dta", replace;
	
	#delimit cr
	}
	
end /* %> */


********************************************************************************
* Event time structure set up 
* Figure 4 panel d utilizes these data


capture program drop build_eventtime /* %< */
program define build_eventtime

	forvalues i=4/4{
		use "$analysis/q`i'spikeraw.dta", clear 
		
		sort gvkey eventyear eventtime
		keep if eventtime==0
		keep gvkey eventyear capxq14 
		gen q`i'spike=1
		drop if capxq14==.
		
		save "$analysis/eventyear`i'.dta", replace
	}

	forvalue i=4/4{
		use "$analysis/eventyear`i'.dta", clear
		
		gen eventid=_n+`i'*1000000
		su eventid
		unique eventid 
		mmerge gvkey using "$analysis/capxannual_trim5pre83.dta", ///
		       type(n:n) unmatched(none)
		drop _merge 
		#delimit ;
		
		sort gvkey eventyear fyear;
		gen dis=fyear-eventyear;
		drop if dis<-3|dis>3;
		gen d3=(dis==-3);gen d2=(dis==-2);
		gen d1=(dis==-1);gen d0=(dis==0);
		gen f1=(dis==1);gen f2=(dis==2);
		gen f3=(dis==3);
		
		la var d1 "D(Lagged 1Y)";la var d2 "D(Lagged 2Y)";
		la var d3 "D(Lagged 3Y)";la var d0 "D(Spike Y)";
		la var f1 "D(Forward 1Y)";la var f2 "D(Forward 2Y)";
		la var f3 "D(Forward 3Y)";
		
		bysort eventid: egen maxdis=max(dis);bysort eventid: egen mindis=min(dis);
		drop if mindis>-2&maxdis<2;drop maxdis mindis;
		#delimit cr
		
		compress
	
		save "$analysis/q`i'event.dta", replace
	}
 
	* Erase mid-step datasets 
	forvalues i=4/4{
		erase "$analysis/q`i'spikeraw.dta"
		erase "$analysis/eventyear`i'.dta"
	}

	forvalues i=1986/2013{
		erase "$analysis/yearnew`i'.dta"
	}
   
end /* %> */
 


********************************************************************************
* Data for cumulative effect plot in Figure 4 Panel D 


capture program drop build_cumulativefig4 /* %< */
program define build_cumulativefig4

	use "$analysis/capxannual_trim5.dta", clear

	bysort gvkey: keep if _n==1
	keep gvkey

	tempfile sampleid
	save "`sampleid'"

	global duration 20

	forvalues i=1985/2013{
		use "$analysis/quarterlydata.dta", clear //
		replace capxq=. if capxq<0
		merge m:1 gvkey using "`sampleid'", keep(3) nogen
		tsset gvkey fyearquarter
		gen basepreq3=(capxq+l.capxq+l2.capxq+l3.capxq)/4
		drop if fyear<`i'-3
		bysort gvkey: egen minyear=min(fyear)
		drop if minyear!=`i'-3

	    * use the average capx in the pre-event years as the base 
		foreach z in 1 2 3 {
			bysort gvkey fyear: egen avecapx=mean(capxq)
			bysort gvkey: gen minavecapx=avecapx if fyear==`i'-`z'
			bysort gvkey: egen basepre`z'y=max(minavecapx)
			drop  minavecapx  avecapx
		}

		egen minyq=min(fyearquarter)
		gen eventtime=fyearquarter -minyq-11
		drop if eventtime>${duration}-4
		gen capxfqsum=capxq if fyear<=`i'
		gen capxfqave=capxq if fyear<=`i'
		bysort gvkey: egen count=count(capxq)
	
		preserve
		
		keep if fyear==`i'
		keep gvkey fyear fquarter capxq basepreq3 basepre1y basepre2y basepre3y
		drop if capxq==.
		reshape wide capxq basepreq3, i(gvkey fyear) j(fquarter)
		gen baseq14=(capxq1+capxq2+capxq3+capxq4)/4
		gen baseq23=(capxq2+capxq3)/2
		gen baseq13=(capxq1+capxq2+capxq3)/3
		keep gvkey baseq14 baseq23 baseq13 basepreq31 basepreq32 basepreq33 ///
			basepreq34 basepre1y basepre2y basepre3y
		
		tempfile base`i'
		save "`base`i''"
	
		restore

		keep  gvkey fyearquarter eventtime capxq capxfq* fquarter fyear
		merge m:1 gvkey using "`base`i''", keep(3) nogen
		tsset gvkey fyearquarter
		sort gvkey fyearquarter

		foreach x in q13 q14 q23 pre1y pre2y pre3y preq31 preq32 preq33 preq34{
			gen capx`x'=capxq/base`x'
		}

	compress
	gen eventyear=`i'
	
	save "$analysis/year`i'.dta", replace
	}


	forvalues j=4/4{
	#delimit ;
		forvalues i=1985/2013{;
			use "$analysis/year`i'.dta", clear;
			sort gvkey fyearquarter;
		forvalues e=2/12{;
			bysort gvkey: replace capxfqsum=l.capxfqsum+capxq if eventtime==`e';
			bysort gvkey: replace capxfqave=(l.capxfqsum+capxq)/(`e') if eventtime==`e';
		};
		foreach x in q13 q14 q23 pre1y pre2y pre3y preq31 preq32 preq33 preq34{;
			gen ave`x'=capxfqave/base`x';
		};
	
		keep gvkey fyearquarter capxq13 capxq14 capxq23 capxpre* ave* eventtime;

		foreach x of varlist capxq13 capxq14 capxq23  capxpre* {;
			winsor2 `x', cut(3 97) replace;
		};

		foreach x of varlist  ave*{;
			winsor2 `x', cut(1 99) replace;
		};

		sort gvkey fyearquarter;
		su capxq14 if eventtime==`j', de; local m=r(p50);
		gen keepid=(eventtime==`j'&capxq14>1.1291&capxq14!=.);

		bysort gvkey: egen keepidmax=max(keepid); keep if keepidmax==1;drop keepid keepidmax;
		collapse (mean) capxq13 capxq14 capxq23 capxpre* ave*, by(eventtime);
		sort eventtime;  
		
		tempfile year`i'; save "`year`i''"; 
		};

		use "`year1985'";   
		forvalues i=1986/2013{; 
			append using "`year`i''"; 
		};
		
		collapse (mean) capxq13 capxq14 capxq23  capxpre* ave* ///
			     (sd)   sdcapxq13=capxq13 sdcapxq14=capxq14 sdcapxq23=capxq23 ///
					    sdcapxpre1y=capxpre1y sdcapxpre2y=capxpre2y ///
						sdcapxpre3y=capxpre3y sdcapxpreq31=capxpreq31 ///
						sdcapxpreq32=capxpreq32 sdcapxpreq33=capxpreq33 ///
						sdcapxpreq34=capxpreq34 sdavepreq31=avepreq31 ///
						sdavepreq32=avepreq32 sdavepreq33=avepreq33 ///
						sdavepreq34=avepreq34 sdaveq13=aveq13 sdaveq14=aveq14 ///
						sdaveq23=aveq23 sdavepre1y=avepre1y sdavepre2y=avepre2y ///
						sdavepre3y=avepre3y, by(eventtime) ;
			 
		foreach x of varlist capxq13-sdavepre3y{;
			replace `x'=`x'*100;
		};
	
		su capxq14 if eventtime==`j';
		save "$analysis/responseq`j'.dta", replace;
		#delimit cr
	}

	* Erase mid-step datasets 
	forvalues i=1985/2013{
		erase "$analysis/year`i'.dta"
	}

end /* %> */


********************************************************************************
*   OTHER DATASETS %<
********************************************************************************

********************************************************************************
* BLS_PPI Dataset
* Table 10 utilizes these data

capture program drop build_bls_ppi /* %< */
program define build_bls_ppi 

	clear 
	
	* machinery 3331-9
	import delimited "$analysis/BLS_PPI/Rawdata/pc.industry.txt", varnames(1)
	save "$analysis/industrycode.dta", replace

	clear 
	import delimited "$analysis/BLS_PPI/Rawdata/pc.data.0.Current.txt", varnames(1)

	keep if substr(series_id,1,3)=="PCU"
	drop footnotes
	gen naics6=substr(series_id,4,6)
	gen industry_code=substr(series_id, 4,6)
	unique industry_code
	gen product_code=substr(series_id,10,.)
	
	foreach x in naics6 industry_code product_code{
		replace `x'=strtrim(`x')
	}
	
	keep if product_code==naics6
	unique naics6
	
	merge n:1 industry_code using "$analysis/industrycode.dta", keep(1 3) nogen
	drop if substr(naics6,5,2)=="--" |substr(naics6,4,3)=="---"|substr(naics6,6,1)=="-"
	unique naics6
	drop series_id 
	drop if real(naics6)==.
	unique naics6
	destring naics6, replace
	gen month=substr(period, 2, 2)
	destring month, replace
	drop period
	drop if month==13
	bysort naics6 year: gen count=_N
	tab count
	drop if count<12
	unique naics6
	bysort naics6 year: egen ave_ppi=mean(value)
	gen ppi_ratio=value*100/ave
	rename value ppi
	drop count industry_code
	order naics6 year month ppi ave_ppi ppi_ratio
	
	save "$analysis/ppinaics6.dta", replace

end /* %> */

********************************************************************************
* M3 Survey Data 
* Figure 8 utilizes these data 
* Table 10 utilizes these data 

capture program drop build_m3survey /* %< */
program define build_m3survey 

/*Item definition
series: 
    1 digit: seasonally adjusted: A/U
    2-4 digit: industry
    5-6 digit: data item
data item:
    VS – Value of Shipments 
    NO – New Orders
    UO – Unfilled Orders
    TI – Total Inventories
    MI – Materials and Supplies Inventories 
    WI – Work in Process Inventories
    FI – Finished Goods Inventories
    IS – Inventories to Shipments Ratios
    US – Unfilled Orders to Shipments Ratios		
		
NAICS aggregate series totals:
    MTM Total Manufacturing
    MXT Manufacturing Excluding Transportation 
    MXD Manufacturing Excluding Defense
    MTU Manufacturing with Unfilled Orders 
    MDM Durable Goods	
	MNM Nondurable Goods
	CMS Construction Materials and Supplies 
	ITI Information Technology Industries 
	CRP Computers and Related Products 
	MVP Motor Vehicles and Parts
    TCG Capital Goods
    NDE Nondefense Capital Goods
    NXA Nondefense Capital Goods Excluding Aircraft
    DEF Defense Capital Goods
    COG Consumer Goods
    CDG Consumer Durable Goods
    CNG Consumer Nondurable Goods
    DXT Durable Goods Excluding Transportation 
	DXD Durable Goods Excluding Defense
   
   33E Industrial machinery manufacturing
	
	
SIC aggregate series totals: 	
    MTM Total Manufacturing
   MXT Manufacturing Excluding Transportation
   MTU All Manufacturing with Unfilled Orders
   MXD Manufacturing Excluding Defense
   DXD Durable Excluding Defense
   MDM Durable Goods Total
   MDU Durable Goods with Unfilled Orders
   MNM Nondurable Goods Total
   MNU Nondurable Goods with Unfilled Orders	
	
	COS Consumer Staples
    MAE Machinery and Equipment
    BUS Business Supplies
    TCG Total Capital Goods
    NDE Nondefense Capital Goods
    NXA Nondefense Capital Goods Excluding Aircraft and Parts	
    HDG Household Durable Goods	

   35M Industrial Machinery and Equipment

*/

 
	* Import and clean the NAICS based data invp uop nop isp  vsp usp 

	clear
	
	foreach x in invp uop nop isp usp{
		clear
		import excel "$analysis/M3 Survey/naics`x'.xls", sheet("m3-outp-`x'")
		
		#delimit ;
		rename A series; rename B year; rename C m1; rename D m2; rename E m3;
		rename F m4; rename G m5; rename H m6; rename I m7; rename J m8;
		rename K m9; rename L m10; rename M m11; rename N m12;
		destring m1-m12, replace ;
		reshape long m, i(series year) j(month);
		gen time=ym(year, month); format time %tm;
		rename m `x';
		gen s_adjusted=substr(series,1,1); 
		gen industry=substr(series,2,3); 
		gen item=substr(series,5,2);
		drop series;
	
		tempfile `x';
		save "``x''";
		#delimit cr
	}
	
	* Some format issues with naicsvsp file format
	clear
	import excel "$analysis/M3 Survey/naicsvsp.xls", sheet("m3-outp-vsp")
	
	#delimit ;
	rename A series; rename B year; rename C m1; rename D m2; rename E m3;
	rename F m4; rename G m5; rename H m6; rename I m7; rename J m8;
	rename K m9; rename L m10; rename M m11; rename N m12;
	#delimit cr

	foreach x in m4 m5 m6 m7 m8 m9 m10 m11 m12{
		replace `x'=" " if `x'=="NA"
	} 
	
	destring m4-m12, replace
	reshape long m, i(series year) j(month)
	gen time=ym(year, month)
	format time %tm
	order series time 
	rename m vsp
	gen s_adjusted=substr(series,1,1)
	gen industry=substr(series,2,3)

	tempfile naicsvsp
	save "`naicsvsp'", replace
	
	clear
	use "`invp'"
	
	reshape wide invp, i(s_adjusted industry time) j(item) string
	merge 1:1 s_adjusted industry time using "`uop'", nogen
	merge 1:1 s_adjusted industry time  using "`nop'", nogen
	merge 1:1 s_adjusted industry time  using "`isp'", nogen
	merge 1:1 s_adjusted industry time  using "`usp'", nogen
	merge 1:1 s_adjusted industry time using "`naicsvsp'", nogen
	 
	#delimit ;
	rename invpFI fi ; rename invpMI mi; rename invpTI ti;
	rename invpWI wi; rename uop uo; rename nop no;
	rename isp is; rename usp us; rename vsp vs;
	#delimit cr
	
	* Rename vsp vs
	order s_adjusted time industry year month 
	drop item series

	save "$analysis/naics.dta", replace

	* Import historical data from 1960-2000ish, using SIC definition

	*invp uop nop isp  vsp usp 
	clear
  
	foreach x in uop nop isp vsp usp invp{
		clear
		import excel "$analysis/M3 Survey/`x'.xls", sheet("`x'")
		#delimit ;
		rename A series; rename B year; rename C m1; rename D m2; rename E m3;
		rename F m4; rename G m5; rename H m6; rename I m7; rename J m8;
		rename K m9; rename L m10; rename M m11; rename N m12;
		reshape long m, i(series year) j(month);
		gen time=ym(year, month); format time %tm;
		rename m `x';
		gen s_adjusted=substr(series,1,1);
		gen industry=substr(series,2,3); 
		gen item=substr(series,5,2);
		drop series;
		
		tempfile `x';
		save "``x''";
	
		#delimit cr
	}
	
	use "`invp'", clear
	
	reshape wide invp, i(s_adjusted industry time) j(item) string
	merge 1:1 s_adjusted industry time using "`uop'", nogen
	merge 1:1 s_adjusted industry time  using "`nop'", nogen
	merge 1:1 s_adjusted industry time  using "`isp'", nogen
	merge 1:1 s_adjusted industry time  using "`vsp'", nogen
	merge 1:1 s_adjusted industry time  using "`usp'", nogen
	
	#delimit ;
	rename invpFI fi ; rename invpMI mi; rename invpTI ti;
	rename invpWI wi; rename uop uo; rename nop no;
	rename isp is; rename usp us; rename vsp vs;
	#delimit cr
	
	order s_adjusted  time industry  year month 
	drop item

	save "$analysis/sic.dta", replace 

end /* %> */

********************************************************************************
* RateWatch Datasets
* Figure 9 utilizes these data 

capture program drop clean_ratewatch
program define clean_ratewatch

	gen loandate=date(date, "YMD")
	format loandate %td
	gen year=year(loandate)
	gen month=month(loandate)
	gen loanmonth=ym(year, month)
	format loanmonth %tm
	order prod_code prod_name loanmonth  accountnumber applicablemeasurement 
	drop if applicablemeasurement ==0
	gen item=1 if substr(prod_name,-4,4)=="Rate"
	replace item=2 if substr(prod_name,-4,4)=="Term"
	drop tier_min tier_max 
	rename  applicablemeasurement  variable
	keep variable accountnumber loanmonth item year month 
	reshape wide variable, i(accountnumber loanmonth year month) j(item)
	rename variable1 rate 
	rename variable2 term
	winsor2 rate, replace cut(1 99)
	
 end
 
capture program drop build_ratewatch /* %< */
program define build_ratewatch 

	use "$analysis/RateWatch/interestrate.dta", clear
	
	format loanmonth %tm
	keep loanmonth tcmnom_y3 tcmnom_y5 tcmnom_y2 tcmnom_y1 tcmnom_y7 tcmnom_y10 ///
		 tcmnom_y20 tcmnom_y30

	save "$analysis/interestrate.dta", replace
 
 *keep the most common term for each contract
 
*************************** 
* Commercial equipment 250k
	forvalues i=2002/2017{
		clear
		import delimited "$analysis/RateWatch/RW_042018/loanRateData_CommEqup250K_`i'.txt", delimiter("|") 
		clean_ratewatch

		tempfile comequp`i'
		save "`comequp`i''"
	}
	
	use "`comequp2002'"
	
	forvalues i=2003/2017{
		append using "`comequp`i''"
	}

	bysort loanmonth: gen obs=_N
	merge n:1 loanmonth using "$analysis/interestrate.dta", keep(1 3) nogen
	tab term
	keep if term==60 
	replace rate=rate-tcmnom_y5
	
	save "$analysis/commequp250k.dta", replace

*************************** 
* Commercial operating loan

	forvalues i=2002/2017{
		clear
		import delimited "$analysis/RateWatch/RW_042018/loanRateData_CommOperLn50K_`i'.txt", delimiter("|") 
		clean_ratewatch

		tempfile commoper`i'
		save "`commoper`i''"
	}
	
	use "`commoper2002'"
	
	forvalues i=2003/2017{
		append using "`commoper`i''"
	}

	bysort loanmonth: gen obs=_N
	merge n:1 loanmonth using "$analysis/interestrate.dta", keep(1 3) nogen
	tab term
	keep if term==12
	replace rate=rate-tcmnom_y1

	save "$analysis/commoper50k.dta", replace
 
**********************************
* Commercial real estate 1 million

	forvalues i=2002/2017{
		clear
		import delimited "$analysis/RateWatch/RW_042018/loanRateData_CommRelEst1Mil_`i'.txt", delimiter("|") 
		drop if prod_code=="00382DOWNPAY"
		clean_ratewatch

		tempfile commreal`i'
		save "`commreal`i''"
	}
	
	use "`commreal2002'"
	
	forvalues i=2003/2017{
		append using "`commreal`i''"
	}
	
	bysort loanmonth: gen obs=_N
	merge n:1 loanmonth using "$analysis/interestrate.dta", keep(1 3) nogen
	tab term
	keep if term==60
	replace rate=rate-tcmnom_y5

	save "$analysis/commreal1mil.dta", replace

********************
* Personal unsecured
 
	forvalues i=2002/2017{
		clear
		import delimited "$analysis/RateWatch/RW_042018/loanRateData_PersonalUnsecLoan_`i'.txt", delimiter("|") 
		
		gen loandate=date(date, "YMD")
		format loandate %td
		gen loanmonth=ym(year(loandate), month(loandate))
		gen year=year(loandate)
		gen month=month(loandate)
		format loanmonth %tm
		order prod_code prod_name loanmonth  accountnumber applicablemeasurement 
		drop if applicablemeasurement ==0
		gen item=0 if substr(prod_name,-4,4)=="Term"
		replace item=1 if substr(prod_name,-1,1)=="1"
		replace item=2  if substr(prod_name,-1,1)=="2"
		replace item=3  if substr(prod_name,-1,1)=="3"
		replace item=4  if substr(prod_name,-1,1)=="4"
 
		drop tier_min tier_max 
		rename  applicablemeasurement  variable
		keep variable accountnumber loanmonth item year month 
		reshape wide variable, i(accountnumber year month loanmonth) j(item)
	
		rename variable0 term 
		rename variable1 rate1
		rename variable2 rate2
		rename variable3 rate3
		rename variable4 rate4
 
		egen ratemin=rowmin(rate1 rate2 rate3 rate4)
		drop rate1-rate4
		rename ratemin rate
		winsor2 rate, replace cut(1 99)

		tempfile personalunsec`i'
		save "`personalunsec`i''"
	}
	
	use "`personalunsec2002'"
	
	forvalues i=2003/2017{
		append using "`personalunsec`i''"
	}

	bysort loanmonth: gen obs=_N
	merge n:1 loanmonth using "$analysis/interestrate.dta", keep(1 3) nogen
	tab term 
	keep if term==36 
	replace rate=rate-tcmnom_y3

	save "$analysis/personalunsec.dta", replace
 
***************************
* Business loan secured 50k

	forvalues i=2002/2017{
		clear
		import delimited "$analysis/RateWatch/RW_042018/loanRateData_BusLnSec50K_`i'.txt", delimiter("|") 
		gen loandate=date(date, "YMD")
		format loandate %td
		gen loanmonth=ym(year(loandate), month(loandate))
		gen year=year(loandate)
		gen month=month(loandate)
		format loanmonth %tm
		order prod_code prod_name loanmonth  accountnumber applicablemeasurement 
		drop if applicablemeasurement ==0
		gen item=1 if substr(prod_name,-4,4)=="Rate"
		replace item=2 if substr(prod_name,-4,4)=="Term"
		drop tier_min tier_max 
		rename  applicablemeasurement  variable
		keep variable accountnumber loanmonth item year month
		winsor2 item, replace cut(1 99)

		tempfile buslnsec50k`i'
		save "`buslnsec50k`i''"
	}
	
	use "`buslnsec50k2002'"

	forvalues i=2003/2017{
		append using "`buslnsec50k`i''"
	}
	
	bysort loanmonth: gen obs=_N
	rename variable rate 
	drop item
	merge n:1 loanmonth using "$analysis/interestrate.dta", keep(1 3) nogen
	replace rate=rate-tcmnom_y1

	save "$analysis/buslnsec50k.dta", replace

end /* %> */

********************************************************************************
* RD Decomposition Dataset
* Appendix Figure IA.II utilizes these data 

capture program drop build_rddecomp
program define build_rddecomp

	clear
	import excel "$analysis/RD/RD decomposition 2008.xls", sheet("Data Table") firstrow
	drop if naics==.
	drop D industry
	replace salary=salary+benefit
	drop benefit
	foreach x in salary stock staffing equipment materials lease depreciation other{
		gen `x'_08=`x'/total
		replace `x'_08=. if `x'==0
		drop `x'
	}
	drop total
	tempfile rd08
	save "`rd08'"

	clear
	import excel "$analysis/RD/RD decomposition 2009.xls", sheet("Data Table") firstrow
	drop N industry
	foreach x in total stock staffing equipment materials lease depreciation ///
	                   other1 other2 other3{
		replace `x'="0" if `x'=="*"|`x'=="D"
		destring `x', replace
	}	
	gen other=other1+other2+other3
	drop other1 other2 other3	
	foreach x in salary stock staffing equipment materials lease depreciation other{
		gen `x'_09=`x'/total
		replace `x'_09=. if `x'==0
		drop `x'
	}	
	drop total
	tempfile rd09
	save "`rd09'"

	clear
	import excel "$analysis/RD/RD decomposition 2010.xls", sheet("Data Table") firstrow	
	drop industry	
	foreach x in naics total stock staffing equipment  materials lease depreciation other1 other2 {
		replace `x'="0" if `x'=="*"|`x'=="D"
		destring `x', replace
	}	
	replace total=salary+stock+staffing+equipment+materials+lease+depreciation+other1+other2 if total==0
	gen other=other1+other2
	drop other1 other2	
	foreach x in salary stock staffing equipment materials lease depreciation other{
		gen `x'_10=`x'/total
		replace `x'_10=. if `x'==0
		drop `x'
	}	
	drop if naics==.
	drop total
	tempfile rd10
	save "`rd10'"

	clear
	import excel "$analysis/RD/RD decomposition 2011.xlsx", sheet("Sheet1") firstrow
	drop industry  M
	drop if naics==. 	
	foreach x in salary stock staffing equipment materials lease depreciation other1 others2{
		replace `x'="0" if `x'=="*"|`x'=="D"
		destring `x', replace
	}	
	replace total=salary+stock+staffing+equipment+materials+lease+depreciation+other1+others2 if total==0
	gen other=other1+others2
	drop other1 others2	
	foreach x in salary stock staffing equipment materials lease depreciation other{
		gen `x'_11=`x'/total
		replace `x'_11=. if `x'==0
		drop `x'
	}
	drop total
	tempfile rd11
	save "`rd11'"

	clear
	import excel "$analysis/RD/RD decomposition 2012.xlsx", sheet("Sheet1") firstrow
	drop M Industry*
	foreach x in stock staffing equipment depreciation other1 other2{
		replace `x'="0" if `x'=="*"|`x'=="D"
		destring `x', replace
	}
	gen other=other1+other2
	drop other1 other2
	foreach x in salary stock staffing equipment materials lease depreciation other{
		gen `x'_12=`x'/total
		replace `x'_12=. if `x'==0
		drop `x'
	}	
	drop total
	tempfile rd12
	save "`rd12'"

	clear
	import excel "$analysis/RD/RD decomposition 2013.xlsx", sheet("Sheet1") firstrow
	drop industry
	gen other=other1+others2
	drop other1 others2
	rename stuffing staffing  
	foreach x in salary stock staffing equipment materials lease depreciation other{
		gen `x'_13=`x'/total
		replace `x'_13=. if `x'==0
		drop `x'
	}	
	drop total

	merge 1:1 naics using "`rd12'", nogen keep(3)
	merge 1:1 naics using "`rd11'", nogen keep(3)
	merge 1:1 naics using "`rd10'", nogen keep(3)
	merge 1:1 naics using "`rd09'", nogen keep(3)
	merge 1:1 naics using "`rd08'", nogen keep(3)
	
	order naics salary* stock* staffing* equipment* materials* lease* depreciation* other*
	foreach x in salary stock staffing equipment materials lease depreciation other{
		egen `x'_m=rowmean(`x'_08 `x'_09 `x'_10 `x'_11 `x'_12 `x'_13)
		drop `x'_08 `x'_09 `x'_10 `x'_11 `x'_12 `x'_13
	}
	
	egen compensation_m=rowtotal(salary_m stock_m)

	* Delete the info for 6 digits, use lower digits to define industry
	drop if naics>10000

	* Naics=0 for all manufacturing, 1 for all non-manufacturing
	
	save "$analysis/rddecomp.dta", replace

end /* %> */

********************************************************************************
*   MAIN
********************************************************************************

capture program drop main /* %> */
program define main
   
    log using "$outputdir/make-kinky.log", replace
	
    ********************************************* 
	* Preliminary Datasets *
    ********************************************* 
	
	* segment information *
	clean_segment
	
	* executive ownership data *
	clean_executive
	
	* earnings management data *
	clean_earnings
	
	* accountingy *
	build_accountingy
	
	* quarterly data *
	clean_quarterly
	
	* merged data *
	build_mergeddata
	
	********************************************* 
	* Main Datasets *
    *********************************************
	
	* capxannual, capxannual_p, capxannual_trim5, and capxannual_trim5pre83 
	build_capxannual
	
	* International data 
	build_international
	
	********************************************* 
	* Regression Datasets *
    *********************************************
	
	* Quarterly ppent data for denominator 
	clean_ppentq
	
	* Regression and output
	build_spikeraw 
	  
	* Event time set up
	build_eventtime
	
	
	* Data for cumulative effect plot in figure 4d
	build_cumulativefig4
	
	********************************************* 
	* Other Datasets *
    *********************************************
	
	* BLS_PPI
	build_bls_ppi
	
	* M3 Survey Data
	build_m3survey
	
	* RateWatch Datasets
	build_ratewatch
	
	* RD Decomposition Dataset 
	build_rddecomp
	
	log close
 
end /* %> */	































